# AN EXTRAVERSION AFFAIR: AFRICAN STATES AND LARGESCALE LAND ACQUISITION SUBNATIONAL LOCATIONS
# Version of the script: 12/04/2022
# C�cile Richetta, University of Geneva, Department of Political Science and International Relations 

# Replication Appendix C
  # I. REPLICATION MODEL 2 WITHOUT DUMMIES
  # II. REPLICATION MODEL 1A, 1B AND 2 WITH T--2 COVARIATES
  # III. REPLICATION MODEL 2 WITH FULL FIXED EFFECTS 

# /!\ University of Geneva High Performance Computing Services were used to estimate all the models. 
# Be careful not to overload and crash your computer

# packages for weight matrix
library(maptools)
library(fields)
library(spdep)
library(data.table)
library(Matrix)
library(base)

# packages for analysis 
library(car)
library(foreign)
library(plyr)
library(dplyr)
library(fastDummies)
library(splm)
library(spmle)
library(plm)
library(psych)
spdep::set.ZeroPolicyOption(TRUE)
spdep::set.ZeroPolicyOption(TRUE)

options(scipen=999)
options(digits=5)

# open data and weight matrix
setwd("")
# data on land deals
load("data.RData")
colnames(data)[1] <- "id"
colnames(data)[2] <- "time"

# --------------------------------- APPENDIX C : LAG t--2 COVARIATES --------------------------------------#

# Table 1. Reproduction of Model 1B and Model 2: Explanatory Variables lagged 2 years. Dependent variable: LSLA OCCURRENCE.
  # weight matrix 100km inverse distance
  setwd("") # work laptop
  W <- readMM(file = "W.mtx") # matrix
  W <- as(W, "dgCMatrix") # to dgc format
  W_lw <- mat2listw(W, style="W")
  
  # Reproduction of Model 1B SPMLE
  N <- nrow(data)/11
  TT <- 11
  has_temporal_lag <- TRUE
  has_spatial_lag <- TRUE
  zero_period <- TRUE
  y <- data$nllabi
  datal2 <- data[, c(14:21, 27:57, 59:68)]
  X1 <- cbind(1, as.matrix(datal2)) 
  ## SPLME ANALYSIS
  model1 <- STModel$new(y = y, X = X1,
                        N = N, TT = TT,
                        W_t = W,
                        has_temporal_lag = has_temporal_lag,
                        has_spatial_lag = has_spatial_lag,
                        zero_period = zero_period,
                        use_py = TRUE)  # use python speed-up!
  ## ML fit
  ml1.fit <- model1$autotrain()
  ## Summary
  summary(ml1.fit)
  saveRDS(ml1.fit, "Model1B-replication1.rds")

  # Reproduction Model 2 SPML
  fm <- nllabi ~ nle_l2*pop_l2 + nle_l2*urban_l2 + nle_l2*crop_l2 + nle_l2*rainsum_l2 +
    nle_l2*water_l2 + nle_l2*polvio_l2 + gridarea + year_2009 + year_2010 + year_2011 + 
    year_2012 + year_2013 + year_2014 + year_2015 + year_2016 + year_2017 + year_2018 + 
    ctyn_3 + ctyn_5 + ctyn_6 + ctyn_7 + ctyn_8 + ctyn_9 + ctyn_10 + ctyn_11 + 
    ctyn_12 + ctyn_13 + ctyn_15 + ctyn_16 + ctyn_17 + ctyn_18 + ctyn_19 + ctyn_20 + ctyn_21 +
    ctyn_23 + ctyn_24 + ctyn_25 + ctyn_26 + ctyn_27 + ctyn_28 + ctyn_29 + ctyn_30 + ctyn_31 +
    ctyn_32 + ctyn_33 + ctyn_34 + ctyn_35 + ctyn_36
  model2 <- spreml(fm, data = data, w=W_lw,
                   error="sem2srre", Hess = FALSE, lag=T, zero.policy=TRUE)
  summary(model2)
  saveRDS(model2, "Model2-replication1.rds")

# Table 2. Reproduction of Model 1B and Model 2: Queen contiguity weight matrix. Dependent variable: LSLA OCCURRENCE. 
  # weight matrix queen contiguity
  setwd("/home/users/r/richetta") # work laptop
  W <- readMM(file = "Wqueen.mtx") # matrix
  W <- as(W, "dgCMatrix") # to dgc format
  W_lw <- mat2listw(W, style="W")
  
  # Reproduction of Model 1B SPMLE
  datal1 <- data[, c(7:13, 21, 27:57, 59:68)]
  X1 <- cbind(1, as.matrix(datal1)) 
  ## SPLME ANALYSIS
  model3 <- STModel$new(y = y, X = X1,
                        N = N, TT = TT,
                        W_t = W,
                        has_temporal_lag = has_temporal_lag,
                        has_spatial_lag = has_spatial_lag,
                        zero_period = zero_period,
                        use_py = TRUE)  # use python speed-up!
  ## ML fit
  ml3.fit <- model3$autotrain()
  ## Summary
  summary(ml3.fit)
  saveRDS(ml3.fit, "Model1b-reproduction2.rds")
  
  # Reproduction Model 2 SPML
  fm <- nllabi ~ nle_l1*pop_l1 + nle_l1*urban_l1 + nle_l1*crop_l1 + nle_l1*rainsum_l1 +
    nle_l1*water_l1 + nle_l1*polvio_l1 + gridarea + year_2009 + year_2010 + year_2011 + 
    year_2012 + year_2013 + year_2014 + year_2015 + year_2016 + year_2017 + year_2018 + 
    ctyn_3 + ctyn_5 + ctyn_6 + ctyn_7 + ctyn_8 + ctyn_9 + ctyn_10 + ctyn_11 + 
    ctyn_12 + ctyn_13 + ctyn_15 + ctyn_16 + ctyn_17 + ctyn_18 + ctyn_19 + ctyn_20 + ctyn_21 +
    ctyn_23 + ctyn_24 + ctyn_25 + ctyn_26 + ctyn_27 + ctyn_28 + ctyn_29 + ctyn_30 + ctyn_31 +
    ctyn_32 + ctyn_33 + ctyn_34 + ctyn_35 + ctyn_36
  model4 <- spreml(fm, data = data, w=W_lw,
                   error="sem2srre", Hess = FALSE, lag=T, zero.policy=TRUE)
  summary(model4)
  saveRDS(model4, "Model2-replication2.rds")

# ------------------------ APPENDIX D : REPRODUCTION MODEL 2 FIXED EFFECTS ---------------------------#
  
# Table 1. Reproduction of Model 2, Random versus Fixed Effects ("Within") Specification. 
  # weight matrix 100km inverse distance
  setwd("") # work laptop
  W <- readMM(file = "W.mtx") # matrix
  W <- as(W, "dgCMatrix") # to dgc format
  W_lw <- mat2listw(W, style="W")
  # equation without any time-invariant variables for Hausmann test
  fm <- nllabi ~ nle_l1*pop_l1 + nle_l1*urban_l1 + nle_l1*crop_l1 + nle_l1*rainsum_l1 +
    nle_l1*water_l1 + nle_l1*polvio_l1
  # Reproduction Model 2 RANDOM Specification
  model5a <- spreml(fm, data = data, w=W_lw,
                   error="sem2srre", Hess = FALSE, lag=T, zero.policy=TRUE)
  summary(model5a)
  saveRDS(model5a, "Model2-replication3a.rds")
  # Reproduction Model 2 WITHIN specification
  model5b <- spml(fm, data = data, listw = mat2listw(W, style = "W"),
                  model="within", spatial.error="kkp", Hess = FALSE, 
                  zero.policy=TRUE)
  summary(model5b)
  saveRDS(model5b, "Model2-replication3b.rds")
  htest <- sphtest(model5a, model5b) # Hausman test fully random versus fully within 
  htest # p = 0.000

  rm(list= ls())
# ---------------------------------------- END SCRIPT ------------------------------------------#
  
  